- Notifications
You must be signed in to change notification settings - Fork 849
/
Copy pathOML4R Feature Selection Algorithm-based.dsnb
executable file
·1 lines (1 loc) · 8.05 KB
/
OML4R Feature Selection Algorithm-based.dsnb
1
[{"layout":null,"template":null,"templateConfig":null,"name":"OML4R Feature Selection Algorithm-based","description":null,"readOnly":false,"type":"low","paragraphs":[{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":["%md"," "],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","# OML4R Feature Selection: Supervised Algorithm","","In this notebook, we demonstrate how to perform feature selection using in-database supervised algorithms via OML4R.","","Using the customer insurance lifetime value data set, which contains customer financial information, lifetime value, and whether or not the customer bought insurance, we build a random forest model to predict whether the customer by insurance or not, then use feature importance to perform feature selection. ","","The dataset `CUSTOMER_INSURANCE_LTV` is generated by the `\"OML Run-me-first\"` notebook, which `MUST` be run before this notebook.","","---","###### `IMPORTANT`: The `\"OML Run-me-first\"` notebook is available under the menu Templates -> Examples and is a prerequisite to the current notebook.","---","","Copyright (c) 2024 Oracle Corporation ","###### <a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a>","---"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"For more information...","message":["%md","","* <a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADB Documentation<\/a>","* <a href=\"https://github.com/oracle-samples/oracle-db-examples/tree/main/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a>","* <a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a>","* <a href=\"https://www.oracle.com/goto/ml-classification\" target=\"_blank\">OML Classification<\/a>","* <a href=\"https://oracle.com/goto/ml-random-forest\" target=\"_blank\">OML Random Forest<\/a>","* <a href=\"https://docs.oracle.com/en/database/oracle/machine-learning/oml4py/2/mlpug/random-forest.html\" target=\"_blank\">OML4Py Random Forest<\/a>"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Import libraries and set display options","message":["%r","","library(ORE)","library(lattice)","options(ore.warn.order=FALSE)"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Get a proxy object to the CUSTOMER_INSURANCE_LTV table and assign rownames for row indexing","message":["%r","","ore.sync(table = 'CUSTOMER_INSURANCE_LTV', use.keys= TRUE)","ore.attach()","rownames(CUSTOMER_INSURANCE_LTV) <- CUSTOMER_INSURANCE_LTV$CUSTOMER_ID"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display a few rows from CUSTOMER_INSURANCE_LTV table","message":["%r","","z.show(head(CUSTOMER_INSURANCE_LTV))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Create Train and Test (60/40 split) datasets","message":["%r","","set.seed(2)","sampleSize <- nrow(CUSTOMER_INSURANCE_LTV)*.4","ind <- sample(1:nrow(CUSTOMER_INSURANCE_LTV),sampleSize)","group <- as.integer(1:nrow(CUSTOMER_INSURANCE_LTV) %in% ind)","","CUSTOMER_INSURANCE_LTV.train <- CUSTOMER_INSURANCE_LTV[group==FALSE,]","CUSTOMER_INSURANCE_LTV.test <- CUSTOMER_INSURANCE_LTV[group==TRUE,]"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Apply Attribute Importance algorithm to obtain the attribute importance","message":["%r","","ai_mod <- ore.odmAI(BUY_INSURANCE ~., CUSTOMER_INSURANCE_LTV.train)","","ai_mod$importance$variable <- row.names(ai_mod$importance)","z.show(head(ai_mod$importance), 10)"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Get the top attributes from Attribute Importance algorithm greater than threshold value","message":["%r","","threshold <- 0.089","features <- rownames(ai_mod$importance)[ai_mod$importance$importance >= threshold]","","z.show(head(ai_mod$importance, n = 10))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Include the target variable to attribute vector","message":["%r","","features <- c(features, \"BUY_INSURANCE\")"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Create a new ORE Frame with top attributes for predicting the target variable","message":["%r","","DF_NEW <- subset(CUSTOMER_INSURANCE_LTV.train, select = features)","z.show(head(DF_NEW))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Build a Naive Bayes model to predict who will purchase insurance using the top attributes","message":["%r","","mod <- ore.odmNB(BUY_INSURANCE~., DF_NEW)","mod"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","## End of script"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":["%md"," "],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"}],"version":"6","snapshot":false,"tags":null}]